## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ dplyr 1.0.7
## ✓ tibble 3.1.4 ✓ stringr 1.4.0
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
## (as 'lib' is unspecified)
## Skipping install of 'leaflet.providers' from a github remote, the SHA1 (86765f12) has not changed since last install.
## Use `force = TRUE` to force installation
## Rows: 22 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): District_Name, Latino%, White%, Black%, Native_American%, Asian%, O...
## dbl (2): District_No, Native_American
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 13530 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): LAST_NME, FIRST_NME, EMPLOYEE_POSITION, CPD_UNIT_ASSIGNED_NO, UNITA...
## dbl (2): AGE, STAR_NO
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 125581 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): gender, race, current_rank, complaint_category, recommended_findi...
## dbl (7): row_id, cr_id, birth_year, current_unit, current_star, recommende...
## lgl (2): middle_initial, middle_initial2
## date (1): appointed_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 48214 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): gender, race
## dbl (2): cr_id, age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 131142 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): beat, location_code, address_number, street, apartment_number, cit...
## dbl (2): row_id, cr_id
## date (3): incident_date, complaint_date, closed_date
## time (1): incident_time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
district_complaints <- complaints_accused %>%
filter(current_unit %in% 1:25) %>%
group_by(current_unit) %>%
summarise(n = n()) %>%
arrange(desc(n))
total_district_complaints <- full_join(district_complaints,
district_demographics,
by = c("current_unit" = "District_No")) %>%
mutate(complaints_per_capita = n/Population)
total_district_complaints %>%
filter(is.na(District_Name) == FALSE) %>%
ggplot(mapping = aes(
x = fct_reorder(District_Name, complaints_per_capita),
y = complaints_per_capita,
color = Majority)) +
geom_point() +
geom_segment(aes(x = fct_reorder(District_Name, complaints_per_capita),
xend = fct_reorder(District_Name, complaints_per_capita),
y = 0, yend = complaints_per_capita)) +
coord_flip() +
labs(title = "Complaints per Capita by District Name",
subtitle = "Colored by Racial Majority",
x = "District Name",
y = "Complaints Per Capita")

district_complaints_1 <- complaints_accused %>%
filter(current_unit %in% 1:25) %>%
group_by(current_unit)
total_district_complaints_findings <- full_join(district_complaints_1,
district_demographics,
by = c("current_unit" = "District_No"))
#stat = "identity"
data1 <- total_district_complaints_findings %>%
mutate(final_decision = as.factor(case_when(
final_finding %in% "SU" ~ "Sustained",
final_finding %in% "DIS" ~ "Sustained",
is.na(final_finding) == TRUE ~ "Missing",
final_finding %in% "NAF" ~ "No Affidavit or Cooperation",
final_finding %in% "NC" ~ "No Affidavit or Cooperation",
final_finding %in% "NS" ~ "Not Sustained",
final_finding %in% "EX" ~ "Not Sustained",
final_finding %in% "UN" ~ "Not Sustained")
)) %>%
group_by(final_decision, District_Name) %>%
summarize(n = n())
## `summarise()` has grouped output by 'final_decision'. You can override using the `.groups` argument.
#data1 %>%
# group_by(final_decision) %>%
#summarize(n = n())
#reorder so that missing is at the end and change colors, take out the NA
#ggplot(data = data1, aes(fill = factor(final_decision, levels = c("Not Sustained", "No Affidavit or Cooperation", "Missing", "Sustained")),
# x = fct_relevel(District_Name), district_levels),
# y = n)) +
#geom_bar(position = "fill", stat = "identity") +
#theme_minimal() +
#scale_fill_viridis_d() +
#coord_flip() +
# labs(title = "Proportion of Final Findings",
# subtitle = "By Neighborhood",
# x = "Proportion of Complaints",
# y = "District Name",
# fill = "Final Decision"
# )
sustained_data <- total_district_complaints_findings %>%
group_by(final_finding, District_Name, Majority, Population) %>%
filter(final_finding == "SU" | final_finding == "DIS") %>%
summarize(n = n()) %>%
mutate(complaints_per_capita = n/Population)
## `summarise()` has grouped output by 'final_finding', 'District_Name', 'Majority'. You can override using the `.groups` argument.
ggplot(data = sustained_data,
mapping = aes(
x = fct_reorder(District_Name, n),
y = n,
color = Majority)) +
geom_point() +
coord_flip() +
labs(title = " Sustained Complaints per Capita by District Name",
subtitle = "Colored by Racial Majority",
x = "District Name",
y = " Sustained Complaints Per Capita")

unsustained_data <- total_district_complaints_findings %>%
group_by(final_finding, District_Name, Majority, Population) %>%
filter(!final_finding == "SU" & !final_finding == "DIS") %>%
summarize(n = n()) %>%
mutate(complaints_per_capita = n/Population)
## `summarise()` has grouped output by 'final_finding', 'District_Name', 'Majority'. You can override using the `.groups` argument.
ggplot(data = unsustained_data,
mapping = aes(
x = fct_reorder(District_Name, n),
y = n,
color = Majority)) +
geom_point() +
coord_flip() +
labs(title = " Unsustained Complaints per Capita by District Name",
subtitle = "Colored by Racial Majority",
x = "District Name",
y = " Unsustained Complaints Per Capita")

missing_data <- total_district_complaints_findings %>%
group_by(final_finding, District_Name, Majority, Population) %>%
filter(is.na(final_finding)) %>%
summarize(n = n()) %>%
mutate(complaints_per_capita = n/Population)
## `summarise()` has grouped output by 'final_finding', 'District_Name', 'Majority'. You can override using the `.groups` argument.
ggplot(data = missing_data,
mapping = aes(
x = fct_reorder(District_Name, n),
y = n,
color = Majority)) +
geom_point() +
coord_flip() +
labs(title = " Missing Complaints per Capita by District Name",
subtitle = "Colored by Racial Majority",
x = "District Name",
y = " Missing Complaints Per Capita")

#no 21 or 23 district but 31st district included?
chicago_police_district_spatial <- st_read(dsn = "/cloud/project/data/geo_export_2efb16ec-aa66-49b0-92a0-2d6f5e0f81d9.shp")
## Reading layer `geo_export_2efb16ec-aa66-49b0-92a0-2d6f5e0f81d9' from data source `/cloud/project/data/geo_export_2efb16ec-aa66-49b0-92a0-2d6f5e0f81d9.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 25 features and 2 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64455 xmax: -87.52414 ymax: 42.02303
## Geodetic CRS: WGS84(DD)
total_district_complaints_spatial <- total_district_complaints %>%
mutate(`Latino%` = str_remove(`Latino%`, "%"),
`White%` = str_remove(`White%`, "%"),
`Black%` = str_remove(`Black%`, "%"),
`Asian%` = str_remove(`Asian%`, "%"),
`Native_American%` = str_remove(`Native_American%`, "%"),
`Other%` = str_remove(`Other%`, "%"),
`Latino%` = as.numeric(`Latino%`),
`White%` = as.numeric(`White%`),
`Black%` = as.numeric(`Black%`),
`Asian%` = as.numeric(`Asian%`),
`Native_American%` = as.numeric(`Native_American%`),
`Other%` = as.numeric(`Other%`)) %>%
mutate(current_unit = as.character(current_unit))%>%
left_join(chicago_police_district_spatial,
by = c("current_unit" = "dist_num")) %>%
st_as_sf() %>%
st_transform("+init=epsg:4326")
## Warning in CPL_crs_from_input(x): GDAL Message 1: +init=epsg:XXXX syntax is
## deprecated. It might return a CRS with a non-EPSG compliant axis order.
bins <- seq(from = 0, to = 100, by = 12.5)
pal_perc <- colorBin("OrRd", domain = total_district_complaints_spatial , bins = bins)
#https://laurielbaker.github.io/DSCA_leaflet_mapping_in_r/slides/leaflet_slides3.html#58
m <- leaflet(total_district_complaints_spatial) %>%
# Now add tiles to it
addTiles() %>%
# Setting the middle of where the map should be and the zoom level
setView(-87.633506, 41.876067, zoom = 9.5) %>%
addProviderTiles(providers$CartoDB.Positron)
Black_perc_m <- m %>%
addPolygons(
fillOpacity = 1,
color = "black",
opacity = 0.7,
weight = 1,
fillColor = ~pal_perc(total_district_complaints_spatial$`Black%`))
Black_perc_m <- Black_perc_m %>%
addLegend(
position = "topright",
pal = pal_perc,
values = ~total_district_complaints_spatial$`Black%`,
title = "Percent Black residents",
opacity = 1)
Black_perc_m
White_perc_m <- m %>%
addPolygons(
fillOpacity = 1,
color = "black",
opacity = 0.7,
weight = 1,
fillColor = ~pal_perc(total_district_complaints_spatial$`White%`))
White_perc_m <- White_perc_m %>%
addLegend(
position = "topright",
pal = pal_perc,
values = ~total_district_complaints_spatial$`White%`,
title = "Percent White residents",
opacity = 1)
White_perc_m
Latino_perc_m <- m %>%
addPolygons(
fillOpacity = 1,
color = "black",
opacity = 0.7,
weight = 1,
fillColor = ~pal_perc(total_district_complaints_spatial$`Latino%`))
Latino_perc_m <- Latino_perc_m %>%
addLegend(
position = "topright",
pal = pal_perc,
values = ~total_district_complaints_spatial$`Latino%`,
title = "Percent Latino residents",
opacity = 1)
Latino_perc_m
# creating map showing neighborhoods with most missing data ie when the final finding is either NA (missing), NAF (no affidavit) or NC (no affidavit)